**********************
*Merge university zip code from restricted-access data to zip county crosswalk
clear

*From https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt
insheet using "statefips_abbreviations.csv"

drop in 1
drop in 1

rename v2 stabbrev
rename v3 stfips
replace stfips = "0" + stfips if strlen(stfips)==1

save "statefips_abbreviations.dta", replace

clear

forvalues i = 1(1)5{
insheet using zipcty`i'.txt

drop in 1
gen instzip = substr(v1,1,5)
gen instcty = substr(v1,26,3)
gen instcty_name = substr(v1,29,.)
gen stabbrev = substr(v1,24,2)

merge m:1 stabbrev using "statefips_abbreviations.dta"
rename _merge mergestabbrev

gen instctyfips = stfips + instcty

save zipcty`i'_inst.dta, replace
clear
}

forvalues i = 6(1)10{
insheet using zipcty`i'.txt

drop in 1
gen instzip = substr(v1,1,5)
gen instcty = substr(v1,26,3)
gen instcty_name = substr(v1,29,.)
gen stabbrev = substr(v1,24,2)

merge m:1 stabbrev using "statefips_abbreviations.dta"
rename _merge mergestabbrev

gen instctyfips = stfips + instcty

save zipcty`i'_inst.dta, replace
clear
}

use zipcty1_inst.dta
forvalues i = 2(1)10{
append using zipcty`i'_inst.dta
}

destring instctyfips, replace

save allzipcty_inst.dta, replace
clear
use allzipcty_inst.dta
keep instctyfips instzip instcty_name
drop if instzip==""

egen taginstzipcty = tag(instzip instctyfips)
keep if taginstzipcty==1

save zipcty_inst_allmatches.dta, replace

*Merge these multiple counties to the data which were missing cty_fips

clear 
use zipcty_inst_allmatches.dta
joinby instzip using restricted_noctyfips.dta, unmatched(both)
rename _merge mergemissctymultcty

*If their zip merged to a normal school and an asylum county, keep the normal school county observation
****want to treat them as having grown up in a normal school county since they did in part

drop if maxhasnorm~=minhasnorm & home_hasnormalschool==0

*If any of them share a county with their university's county, choose that one

gen homeinstsamecty_alt = instctyfips==homectyfips

local vars homeinstsamecty_alt 
foreach x of local vars{
bysort subjid year acerecode: egen max`x' = max(`x')
}

*Fill in for the very few universities with missing zip but which have city and state
*Use city-to-county finder from StatsAmerica https://www.statsamerica.org/CityCountyFinder/ (12/18/23)
*This gives county name, but then get county fips from https://transition.fcc.gov/oet/info/maps/census/fips/fips.txt (12/18/23)


*Except O'Fallon MO found through https://explorer.naco.org/?find=true because did not come up in Stats America (12/18/23)

replace instctyfips = 12021 if instzip=="" & city=="Ave Maria" & state=="FL"
replace instctyfips = 36119 if instzip=="" & city=="Briarcliff Manor" & state=="NY"
replace instctyfips = 06111 if instzip=="" & city=="Camarillo" & state=="CA"
replace instctyfips = 29107 if instzip=="" & city=="Concordia" & state=="MO"
replace instctyfips = 34013 if instzip=="" & city=="East Orange" & state=="NJ"
replace instctyfips = 12071 if instzip=="" & city=="Fort Myers" & state=="FL"
replace instctyfips = 32003 if instzip=="" & city=="Henderson" & state=="NV"
replace instctyfips = 29183 if instzip=="" & city=="O'Fallon" & state=="MO"
replace instctyfips = 13217 if instzip=="" & city=="Oxford" & state=="GA"
replace instctyfips = 34003 if instzip=="" & city=="Rutherford" & state=="NJ"
replace instctyfips = 09013 if instzip=="" & city=="Storrs" & state=="CT"
replace instctyfips = 55101 if instzip=="" & city=="Waterford" & state=="WI"

gen homectyfipsU = homectyfips if homectyfips==instctyfips
bysort subjid year acerecode: egen maxhomectyfipsU = max(homectyfipsU)

gen instctyfipsU = instctyfips if homectyfips==instctyfips
bysort subjid year acerecode: egen maxinstctyfipsU = max(instctyfipsU)

gen homeinstsamectyn = instctyfips == homectyfips
replace homeinstsamectyn = . if instctyfips==. | homectyfips==.
local vars homeinstsamectyn
foreach x of local vars{
bysort subjid year acerecode: egen max`x' = max(`x')
}

count if (maxhomectyfipsU~=maxinstctyfipsU) & maxhomeinstsamectyn==1
egen tagsubjn = tag(subjid year acerecode)

*Appendix E text statistic: students with missing institution zip code, but not missing city and state
tab tagsubjn if instzip=="" & city~="" & state~=""
*Appendix E text statistic: how many universities do these students attend
tab acerecode if instzip=="" & city~="" & state~=""

*For tagsubj=1 and for people who have at least one cty same as univ, make the home cty the univ cty

gen homectyfipswmax = homectyfips
replace homectyfipswmax = maxhomectyfipsU if tagsubjn==1 & maxhomeinstsamectyn==1

gen instctyfipswmax = instctyfips
replace instctyfipswmax = maxinstctyfipsU if tagsubjn==1 & maxhomeinstsamectyn==1

bysort subjid year acerecode: egen mintagn = min(tagsubjn)
tab mintagn if tagsubjn==1

keep if tagsubjn==1

drop homectyfips homeinstsamecty instctyfips
rename homectyfipswmax homectyfips 
rename instctyfipswmax instctyfips
rename maxhomeinstsamectyn homeinstsamecty

save noctyfips_withinstcty.dta, replace
count

clear
use noctyfips_withinstcty.dta
keep subjid acerecode year instctyfips homectyfips homeinstsamecty city state

save noctyfips_withinstcty_basic.dta, replace

*Merge these to the main data

clear
use choice_demog_merge_normasylum.dta
merge 1:1 subjid acerecode year using noctyfips_withinstcty_basic.dta, update replace
rename _merge mergeaddcty
save choicedemog_addcty.dta, replace
